clear all
set maxvar 5000

*cd "1_Data/"
cd "/Users/tg2778/Dropbox/0_Reviews_RnRs/072022_JOP_Roads/v3_JOP/Replication - Roads"
log using "4_Log/6_DataFigs.log"

set scheme plotplain

use "1_Data/AC_pre-delim_dataset.dta", clear

**if reported votes votes_1 is 0 this means it is uncontested elections
drop if votes_1==0 | votes_1==.
drop if votes1_t1==0 | votes1_t1==.

drop if pollpercentc_1==0 | pollpercentc_1==.
drop if pollpercentc1_t1==0 | pollpercentc1_t1==.

**drop very low number of villages
**drop if nvillagesabs<=10
drop if nvillagesabs<20

*drop if nvillagesabs<10 & census_totpop<.05*nelectors
**kerala has odd village structure
drop if state=="kerala"


drop if groupcode!="t01"

hist percenteligiblevillage500, frequency ylabel(,format(%13.0g)) xtitle("% villages in a constituency that are over 500/250 population") name(insthist)
graph export "3_Figs/Fig3B.pdf", as(pdf) replace

use "1_Data/censusvillageroads.dta", clear

egen statename = group(state)

egen statedistrict = group(districtid stateid)

gen pmgsyroad = 0
replace pmgsyroad = 1 if roadcode>0 & roadcode!=.

gen pmgsyroad2008 = 0 if roadsanction_period==. | roadsanction_period==3|roadsanction_period==4
replace pmgsyroad2008 = 1 if roadsanction_period==1| roadsanction_period==2

keep if stateid!=32

**road in 2001 - is there road in baseline villages in 2001?
gen pavedroad2001 = pc01_vd_app_pr

gen villagepop= pc01_vd_t_p

gen popcutoff500 = villagepop-500
set scheme plotplain

binscatter pmgsyroad villagepop if villagepop>=1 & villagepop<=3000 , nquantiles(20) xtitle("Population (Census 2001)") ytitle("Probability of PMGSY connectivity by 2018") linetype(none) msymbol(solid) xlabel(0(250)3000, nogrid)
graph export "3_Figs/Fig2B.pdf", as(pdf) replace

hist villagepop if villagepop>=1 & villagepop<=3000, freq ylabel(,format(%13.0g)) xtitle("Population (Census 2001)")
graph export "3_Figs/Fig2A.pdf", as(pdf) replace
log close
